#s# Introduction The two words I chose to use on pixabay was big
animals as I like animals and thought there would be more then 200
results with these key words. Here are the top photos returend from this
search.
#To Calculate mean of downloads
mean_downloads <- photo_data$downloads %>%
mean(na.rm = TRUE) %>%
round(2)
#To Calculate Max downloads of a photo
max_downlaods <- photo_data$downloads %>%
max(na.rm = TRUE)
#To calculate mean download rate
mean_download_rate = photo_data$download_rate %>%
mean(na.rm = TRUE) %>%
round(2)
#To Calculate sum of downloads
sum_downloads <- photo_data$downloads %>%
sum(na.rm = TRUE)
#To compare to mean download rate with each of the three categories
summs <- photo_data %>%
group_by(download_rate_category) %>%
summarise(mean_download_rate = mean(download_rate))
The mean downloads of the selected photos is 162215.83 downloads.
The max downloads for one photo is 888859 downloads.
In total there have been 8759655 downloads from all of the photos combined.
The mean download rate of the photos from people viewing the photo to downloading it is 65.8%.
Here we can see the mean download rates for each of the categories:| download_rate_category | mean_download_rate |
|---|---|
| High | 77.78800 |
| Low | 39.10000 |
| Mid | 60.64355 |
read_csv("selected_photos.csv") %>%
ggplot() +
geom_jitter(aes(x = views,
y = download_rate,
colour = download_rate_category)) +
labs(title = "Download Rate vs Views",
subtitle = "Comparing the number of views a post has in relation to its downlaod conversion",
x = "Views",
y = "Download Rate %",
caption = "Source: Pixabay")
This has demonstrated creativity as I created a scatter plot comparing the number of views with my mutated variable of download rate. This adds onto my report nicely as it shows the different download rate categories with there related conversion percentage and views.
library(tidyverse)
library(jsonlite)
library(magick)
json_data <- fromJSON("pixabay_data.json")
pixabay_photo_data <- json_data$hits
selected_photos <- pixabay_photo_data %>%
# Get only photos wth more then 70000 views
filter(views > 70000) %>%
# Selecting certain variables we want
select(previewURL, pageURL, views, downloads, likes, comments) %>%
# Adding three new Variables
# 1. Number of interactions with the photo
# 2. The rate of downloads from people who have views the photo
# 3. Download Category below 50% low, below 70% is mid, above 70% is high.
mutate(engagement = views + downloads + likes + comments,
download_rate = round((downloads / views) * 100,
2),
download_rate_category = ifelse(download_rate > 50,
yes = ifelse(download_rate > 70,
"High",
"Mid"),
"Low"))
write_csv(selected_photos, "selected_photos.csv")
#To Calculate mean of downloads
mean_downloads <- selected_photos$downloads %>%
mean(na.rm = TRUE) %>%
round(2)
#To Calculate Max downloads of a photo
max_downlaods <- selected_photos$downloads %>%
max(na.rm = TRUE)
#To calculate mean download rate
mean_download_rate = selected_photos$download_rate %>%
mean(na.rm = TRUE) %>%
round(2)
#To Calculate sum of downloads
sum_downloads <- selected_photos$downloads %>%
sum(na.rm = TRUE)
#To compare to mean download rate with each of the three categories
selected_photos %>%
group_by(download_rate_category) %>%
summarise(mean_download_rate = mean(download_rate))
## Animated Gif
gifdata <- read_csv("selected_photos.csv")
img_urls <- gifdata$previewURL %>% na.omit()
frames_animals <- image_read(img_urls) %>%
image_join() %>%
image_scale(400) %>%
image_animate(fps = 1) %>%
image_write("my_photos.gif")
# Creativity Mark
read_csv("selected_photos.csv") %>%
ggplot() +
geom_jitter(aes(x = views,
y = download_rate,
colour = download_rate_category)) +
labs(title = "Download Rate vs Views",
subtitle = "Comparing the number of views a post has in relation to its downlaod conversion",
x = "Views",
y = "Download Rate",
caption = "Source: Pixabay")